1 Load data

se <- biocrates("2022-02-22_Conc_raw_smartcare 10 patient thorax cohort_hmdb.xlsx")
se$tissue <- ""
se$tissue[grep(x = se$Sample.Description, pattern = "_TU")] <- "TU"
se$tissue[grep(x = se$Sample.Description, pattern = "_NG")] <- "NG"
se$individual <- stringr::str_remove(se$Sample.Description, pattern = "_TU|_NG")

2 Load data and filter features

Run the function metIDQ_get_high_quality_features and only retain the features that have more than 2/3 of “green”/“lightblue” values.

file <- "2022-02-22_Conc_raw_smartcare 10 patient thorax cohort_hmdb.xlsx"
features <- metIDQ_get_high_quality_features(file, threshold = 2/3)
se <- se[make.names(names(which(features))), ]

3 QC

shinyQC(se)

Exclude the following samples: PBS samples and QC samples.

exclude <- c("PBS_1", "PBS_2", "PBS_3", "Quant 500_QC1_1", "Quant 500_QC2_2",
    "Quant 500_QC2_3", "Quant 500_QC2_4", "Quant 500_QC3_5")
se <- MatrixQCvis:::selectSampleSE(se, 
    selection = exclude,
    mode = "exclude")

4 Data Transformation

Perform log-transformation on the data set.

a <- assay(se)
a[a <= 0] <- NA
a_t <- transformAssay(a, method = "log")
se <- MatrixQCvis:::updateSE(se = se, a = a_t)

saveRDS(se, file = "SummarizedExperiment_extraction_method_cohort_metabolomics.RDS")

5 Run the linear model

cD <- colData(se)
cD <- colData(se)
design <- model.matrix(~ 0 + tissue, data = cD)
colnames(design) <- make.names(colnames(design))
cor <- duplicateCorrelation(assay(se), design, block=cD$individual)
fit <- lmFit(object = assay(se), design = design, block = cD$individual,
    correlation = cor$consensus)

## create contrasts
contrasts <- makeContrasts(
    TUvsNG = ( tissueTU - tissueNG),
    levels = design)
fit_c <- contrasts.fit(fit, contrasts)
fit_eB <- eBayes(fit_c)

## set parameters for differential expression
num <- Inf
p_val <- 1
adj <- "BH"

5.1 TU vs. NG

We test here the DE proteins between NG and TU.

tT <- topTable(fit_eB, number = num, p.value = p_val, adjust.method = adj,
    coef = "TUvsNG")
rmarkdown::paged_table(tT)
sum(tT$adj.P.Val < 0.05, na.rm = TRUE)
## [1] 49
tT <- cbind(name = rownames(tT), tT)
volcanoPlot(tT)
write.table(tT, file = "metabolomics_DE_t_TUvsNG.txt", 
    quote = FALSE, sep = "\t")

5.2 Individuals as covariates

cD <- colData(se)
design <- model.matrix(~ 0 + tissue + individual, data = cD)
fit <- lmFit(object = assay(se), design = design)

## create contrasts
contrasts <- makeContrasts(
    TUvsNG = (tissueTU - tissueNG),
    levels = design)
fit_c <- contrasts.fit(fit, contrasts)
fit_eB <- eBayes(fit_c)

## set parameters for differential expression
num <- Inf
p_val <- 1
adj <- "BH"

## get the features for autoSP3
tT_covariate <- topTable(fit_eB, number = num, p.value = p_val, 
    adjust.method = adj, coef = "TUvsNG")

cor(x = tT_covariate$t[order(rownames(tT_covariate))], y = tT$t[order(rownames(tT))], 
    use = "pairwise.complete.obs")
## [1] 0.9859315

  1. European Molecular Biology Laboratory, Meyerhofstrasse 1, 69117 Heidelberg, Germany↩︎